## Pseudocode for building matching dataset
import json
from operator import itemgetter

def getMatchData(cosponsorship, congress_range):
    for congress in congress_range:
        match_data = {}
        
        for chamber in ['senate', 'house']:
            for legA in all_legislators[chamber][congress]:
                try:
                    cosponsors = cosponsorship[legA]
                except:
                    continue
                for legB in all_legislators[chamber][congress]:                        
                    # Get parties
                    partyA = resolveParty(legA, congress, party)
                    partyB = resolveParty(legB, congress, party)
                    
                    # Assess whether they've cosponsored sufficiently often                
                    try:
                        cs = countCs(cosponsors, legB, congress)
                        trim_cs = str(min(cs, cs_ceiling))
                    except:
                        continue
                    
                    # Add them to the match data for committes on which neither A or B served
                    # for the three congresses surrounding when the treated unit received the treatment 
                    for committee in standing_committees[chamber]:
                        include = 1
                        # Make sure they were in the Congress for all three periods
                        if not set([congress-1, congress, congress+1]).issubset(committees[committee].keys()):
                            include = 0
                        else:
                            if legA in committees[committee][congress-1] or legA in committees[committee][congress] or \
                            legA in committees[committee][congress+1]:
                                include = 0  
                            if legB in committees[committee][congress-1] or legB in committees[committee][congress] or \
                            legB in committees[committee][congress+1]:
                                include = 0
                            if legA not in all_legislators[chamber][congress-1] or \
                            legA not in all_legislators[chamber][congress+1]:
                                include = 0
                            if legB not in all_legislators[chamber][congress-1] or \
                            legB not in all_legislators[chamber][congress+1]:
                                include = 0
                                                                
                        if include:
                            # The way it is set up, switchers will be excluded for a fairly wide radius.
                            # But that's ok with me.
                            sameAcs, oppAcs = commConnect(legA, committees[committee][congress],
                            congress, party, cosponsorship)
                            
                            sameBcs, oppBcs = commConnect(legB, committees[committee][congress],
                            congress, party, cosponsorship)
                            
                            profile = ','.join([str(congress), committee, chamber, trim_cs, 
                            partyA, partyB])
                            
                            if profile not in match_data:
                                match_data[profile] = []
                            match_data[profile].append([legA, legB, str(cs), str(sameAcs), \
                            str(oppAcs), str(sameBcs), str(oppBcs)])
                            
        with open('./Dropbox/Coalitions/match/basematch' + '-' + str(congress) + '.json', 'w') as outf:
            json.dump(match_data, outf, sort_keys=True, indent=4)
            
    # Add agreement 
    for congress in congress_range:
        with open("./Dropbox/Coalitions/match/basematch" + "-" + str(congress) + ".json", 'r') as dataf:
            match_data = json.load(dataf)
            new_match_data = {}
            for chamber in ['house', 'senate']:
                for committee in standing_committees[chamber]:
                    
                    with open("./Dropbox/Coalitions/agreement/agree-" + committee +".json", 'r') as agf:
                        agreement_data = json.load(agf)
                        
                    for profile in match_data:
                        if profile.split(',')[1] != committee:
                            continue
        
                        new_match_data[profile] = []
                            
                        for pair in match_data[profile]:
                            legA = pair[0]
                            legB = pair[1]
                            
                            try:
                                agree = findAgreeRate(pair[0], pair[1], committee, chamber, congress - 1, agreement_data)
                                new_match_data[profile].append(pair + [str(agree[0])])
                                
                            except:
                                pass
                            
            with open('./Dropbox/Coalitions/match/agmatch' + '-' + str(congress) + '.json', 'w') as outf:
                json.dump(new_match_data, outf, sort_keys=True, indent=4)